library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)
library(censusapi)
library(zoo)
years <- 2017:2020
quarters <- 1:4
types <- c("Electric","Gas")
pge_17_to_20_elec_and_gas <- NULL
for (year in years) {
if (year == 2020) {
quarters <- 1:2
}
for (quarter in quarters) {
for (type in types) {
filename <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
print(filename)
temp <- read_csv(filename)
pge_17_to_20_elec_and_gas <- bind_rows(pge_17_to_20_elec_and_gas, temp)
saveRDS(pge_17_to_20_elec_and_gas, "pge_17_to_20_elec_and_gas.rds")
}
}
}
## [1] "PGE_2017_Q1_ElectricUsageByZip.csv"
## [1] "PGE_2017_Q1_GasUsageByZip.csv"
## [1] "PGE_2017_Q2_ElectricUsageByZip.csv"
## [1] "PGE_2017_Q2_GasUsageByZip.csv"
## [1] "PGE_2017_Q3_ElectricUsageByZip.csv"
## [1] "PGE_2017_Q3_GasUsageByZip.csv"
## [1] "PGE_2017_Q4_ElectricUsageByZip.csv"
## [1] "PGE_2017_Q4_GasUsageByZip.csv"
## [1] "PGE_2018_Q1_ElectricUsageByZip.csv"
## [1] "PGE_2018_Q1_GasUsageByZip.csv"
## [1] "PGE_2018_Q2_ElectricUsageByZip.csv"
## [1] "PGE_2018_Q2_GasUsageByZip.csv"
## [1] "PGE_2018_Q3_ElectricUsageByZip.csv"
## [1] "PGE_2018_Q3_GasUsageByZip.csv"
## [1] "PGE_2018_Q4_ElectricUsageByZip.csv"
## [1] "PGE_2018_Q4_GasUsageByZip.csv"
## [1] "PGE_2019_Q1_ElectricUsageByZip.csv"
## [1] "PGE_2019_Q1_GasUsageByZip.csv"
## [1] "PGE_2019_Q2_ElectricUsageByZip.csv"
## [1] "PGE_2019_Q2_GasUsageByZip.csv"
## [1] "PGE_2019_Q3_ElectricUsageByZip.csv"
## [1] "PGE_2019_Q3_GasUsageByZip.csv"
## [1] "PGE_2019_Q4_ElectricUsageByZip.csv"
## [1] "PGE_2019_Q4_GasUsageByZip.csv"
## [1] "PGE_2020_Q1_ElectricUsageByZip.csv"
## [1] "PGE_2020_Q1_GasUsageByZip.csv"
## [1] "PGE_2020_Q2_ElectricUsageByZip.csv"
## [1] "PGE_2020_Q2_GasUsageByZip.csv"
ca_counties <- counties("CA", cb=T, progress_bar = F)
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Fransisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
ca_counties %>%
filter(NAME %in% bay_county_names)
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
pge_final <-
pge_17_to_20_elec_and_gas %>%
filter(CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial",
"Gas- Residential",
"Gas- Commercial"
),
ZIPCODE %in% bay_zips$ZCTA5CE10) %>%
mutate(
TOTALKBTU = case_when(
!is.na(TOTALKWH) ~ TOTALKWH * 3.412,
!is.na(TOTALTHM) ~ TOTALTHM * 999.76
)
) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
MONTHLYKBTU =
sum(
TOTALKBTU,
na.rm = T
)
) %>%
mutate(Date = as.yearmon(paste(YEAR, MONTH, sep = "-")))
pge_chart <-
pge_final %>%
ggplot() +
geom_bar(
mapping = aes(
x= Date,
y= MONTHLYKBTU,
fill = CUSTOMERCLASS),
stat = "identity",
position = "stack") +
labs(
y = "Monthly kBTU",
title = "PG&E Bay Area Monthly Electricity and Gas Usage, 2017-2020",
fill = "Electricity Type"
)
pge_chart %>% ggplotly()
Based on the displayed chart, there are some observable changes in energy consumption that may be attributable to the COVID-19 pandemic. Comparing 2019 consumption from April-June to 2020 consumption from April-June, the most obvious difference is the higher use of residential gas in April 2020 compared to April 2019.This fairly observable difference could potentially be due to the fact that April was the first full month on California’s stay-at-home order, and so it’s possible that April was the month when the largest number of people were home. However, we don’t see as large of difference with residential electricity use. Other observable trends include: lower commercial gas use from May-June 2020 than from May-June 2019, higher residential electricity use from April-June 2020 than April-June 2019, and lower commercial electricity use from May-June 2020 than May-June 2019.
pge_res_elec_covid <-
pge_17_to_20_elec_and_gas %>%
filter(YEAR %in% (2019:2020), MONTH %in% (4:6),
CUSTOMERCLASS == "Elec- Residential") %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
group_by(ZIPCODE, YEAR) %>%
summarize(
TOTALKWH =
sum(TOTALKWH, na.rm = T)) %>%
pivot_wider(
names_from = YEAR,
values_from = TOTALKWH
) %>%
rename(
KWH2019 = "2019", KWH2020 = "2020"
) %>%
mutate(
percent_change =
((KWH2020 - KWH2019)/KWH2019) * 100
) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
filter(KWH2019 != 0, KWH2020 !=0, !is.na(KWH2019), !is.na(KWH2020)) %>%
st_as_sf() %>%
st_transform(4326)
pge_comm_elec_covid <-
pge_17_to_20_elec_and_gas %>%
filter(YEAR %in% (2019:2020), MONTH %in% (4:6),
CUSTOMERCLASS == "Elec- Commercial") %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
group_by(ZIPCODE, YEAR) %>%
summarize(
TOTALKWH =
sum(TOTALKWH, na.rm = T)
) %>%
pivot_wider(
names_from = YEAR,
values_from = TOTALKWH
) %>%
rename(
KWH2019 = "2019", KWH2020 = "2020"
) %>%
mutate(
percent_change =
((KWH2020 - KWH2019)/KWH2019) * 100
) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
filter(KWH2019 != 0, KWH2020 !=0, !is.na(KWH2019), !is.na(KWH2020)) %>%
st_as_sf() %>%
st_transform(4326)
res_pal <- colorBin(
palette = "PiYG",
domain =
pge_res_elec_covid$percent_change,
bins = c(-75, -20, -10, 0, 10, 20, 30)
)
leaflet() %>%
addTiles() %>%
addPolygons(
data = pge_res_elec_covid,
fillColor = ~res_pal(percent_change),
color = "white",
opacity = 1,
fillOpacity = 1.5,
weight = 1,
label = ~paste0(
round(percent_change),
" percent change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = pge_res_elec_covid,
pal = res_pal,
values = ~percent_change,
title = "Percent Change in <br> April-June Residential <br> kWH, 2019-2020"
)
For this map, I chose to represent “before” COVID as cumulative electricity use from April-June of 2019, and “after” COVID began as cumulative electricity use from April-June of 2020. I chose to compare 2019 to 2020 because I believe 2019 data would provide the most relevant and updated information on energy use in the Bay Area as a baseline year. I chose the months April-June because April was the first full month of California’s shelter-in-place, and because June is the most recent available month for 2020 data. While the effects of COVID on energy consumption vary by neighborhood, with some zipcodes experiencing a 20% + increase in energy use and others actually experiencing a decrease in energy use, the majority of zipcodes have experienced an increase in residential electricity use to some degree since COVID began.
res_pal <- colorBin(
palette = "RdBu",
domain =
pge_comm_elec_covid$percent_change,
bins = c(-90, -70, -50, -30, -10, 0, 25, 50, 125, 200, 300)
)
leaflet() %>%
addTiles() %>%
addPolygons(
data = pge_comm_elec_covid,
fillColor = ~res_pal(percent_change),
color = "white",
opacity = 1.5,
fillOpacity = 2,
weight = 1,
label = ~paste0(
round(percent_change),
" percent change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = pge_comm_elec_covid,
pal = res_pal,
values = ~percent_change,
title = "Percent Change in <br> April-June Commercial <br> kWH, 2019-2020"
)
My choice in defining “before” COVID and “after” COVID began follows the same thought process as above. Similar to the percentage change in residential energy use, the change in commercial energy use also varies by neighborhood. However, as is clear by the map, the overwhelming majority of zipcodes have experienced a decrease in commercial electricity use, some even as large as an 84% decrease. There are a few zipcodes, shown in dark blue, that indicate a large increase in commercial energy use, but overall commercial electricity use has decreased across the Bay Area.